Jayden Yap Jean Hng p2112790
Generative Adversarial Networks (GANs) were first introduced in 2014 by Ian Goodfellow. It consists of 2 neural networks: a generator network to create new data samples, and a discriminator network that tries to distinguish the generated samples from real ones. The two networks are trained together in an adversarial way, where the generator tries to create samples that fool the discriminator to think it is real, and the discriminator tries to correctly identify the generated samples.
GANs have been applied to a wide range of tasks. One popular dataset for training GANs is the CIFAR-10 dataset, which consists of 60,000 32x32 color images in 10 classes, with 6,000 images per class. There are 50,000 training images and 10,000 test images.
However, training GANs is a challenging task, as the generator and discriminator can easily get stuck, such as mode collapse (where the generator produces limited variations of the same sample) or overfitting (where the generator produces samples that are too similar to the training data). Researchers have proposed various techniques to stabilize GAN training, such as using different loss functions or architectures, but the problem is still an active area of research.
For my project, i want to experiment with different architectures, loss functions, and hyperparameters to train a GAN on the CIFAR-10 dataset, and compare its performance to the state-of-the-art models via FID metrics.
!nvidia-smi
import tensorflow as tf
from tensorflow.keras import backend as K
import numpy as np
from matplotlib import pyplot as plt
import math
#using school server 8 GPUS, select one gpu
import os
os.environ["CUDA_VISIBLE_DEVICES"]="7"
tf.config.list_physical_devices(
device_type=None
)
physical_devices = tf.config.experimental.list_physical_devices('GPU')
for i in physical_devices:
tf.config.experimental.set_memory_growth(i, True)
#hide warning for better outputs
import warnings
warnings.filterwarnings("ignore")
%%javascript
(function(on) {
const e = $("<a>Setup failed</a>");
const ns = "js_jupyter_suppress_warnings";
var cssrules = $("#" + ns);
if(!cssrules.length)
cssrules = $("<style id='" + ns + "' type='text/css'>div.output_stderr { } </style>").appendTo("head");
e.click(function() {
var s = 'Showing';
cssrules.empty()
if(on) {
s = 'Hiding';
cssrules.append("div.output_stderr, div[data-mime-type*='.stderr'] { display:none; }");
}
e.text(s + ' warnings (click to toggle)');
on = !on;
}).click();
$(element).append(e);
})(true);
# load and prepare cifar10 training images
def loadRealSamples():
# load cifar10 dataset
(trainX, _), (testX, _) = tf.keras.datasets.cifar10.load_data()
# convert from unsigned ints to floats
X = trainX.astype('float32')
X2=testX.astype('float32')
X=np.concatenate((X,X2), axis=0)
# scale from [0,255] to [
# -1,1]
X = (X - 127.5) / 127.5
return X
def loadRealLabels():
# load cifar10 dataset
( _,trainY), (_,testY) = tf.keras.datasets.cifar10.load_data()
Y=np.concatenate((trainY,testY), axis=0)
Y=tf.keras.utils.to_categorical(Y, 10)
return Y
#load and prepare only test images
def loadTestSamples():
# load cifar10 dataset
(_, _), (testX, _) = tf.keras.datasets.cifar10.load_data()
# convert from unsigned ints to floats
X=testX.astype('float32')
# scale from [0,255] to [
# -1,1]
X = (X - 127.5) / 127.5
return X
# select some real image samples
def selectRealSamples(dataset, n_samples):
# choose random instances
ix = np.random.randint(0, dataset.shape[0], n_samples)
# retrieve selected images
X = dataset[ix]
# generate 'real' class labels (1)
y = np.ones((n_samples, 1))
return X, y
# generate points in latent space as input for the generator
def generateLatentPoints(latentDim, n_samples):
# generate points in the latent space
x_input = np.random.randn(latentDim * n_samples)
# reshape into a batch of inputs for the network
x_input = x_input.reshape(n_samples, latentDim)
return x_input
# use the generator to generate n fake examples, with class labels
def generateFakeSamples(genModel, latentDim, n_samples):
# generate points in latent space
x_input = generateLatentPoints(latentDim, n_samples)
# predict outputs
X = genModel.predict(x_input)
# create 'fake' class labels (0)
y = np.zeros((n_samples, 1))
return X, y
# create and save a plot of generated images
def savePlot(examples, epoch, name, n=5,save_frequency=10):
# scale from [-1,1] to [0,1]
examples = (examples + 1) / 2.0
plt.figure(figsize=(5,5))
plt.suptitle(f'{name} epoch {epoch+1}')
# plot images
for i in range(n * n):
# define subplot
plt.subplot(n, n, 1 + i)
# turn off axis
plt.axis('off')
# plot raw pixel data
plt.imshow(examples[i])
# save plot to file every 10 epochs
if (epoch+1)%save_frequency==0:
subfolder_path = os.path.join(os.getcwd(), 'imgs')
subfolder_path = os.path.join(subfolder_path, f'{name}')
if not os.path.exists(subfolder_path):
os.makedirs(subfolder_path)
filename = os.path.join(subfolder_path, f'plot{epoch+1}.png')
plt.savefig(filename[:])
plt.show()
# evaluate the discriminator, plot generated images, save generator model
def summarisePerf(epoch, genModel, discModel, dataset, latentDim, n_samples=150, name='model'):
# prepare real samples
X_real, y_real = selectRealSamples(dataset, n_samples)
# evaluate discriminator on real examples
_, acc_real = discModel.evaluate(X_real, y_real, verbose=0)
# prepare fake examples
x_fake, y_fake = generateFakeSamples(genModel, latentDim, n_samples)
# evaluate discriminator on fake examples
_, acc_fake = discModel.evaluate(x_fake, y_fake, verbose=0)
# summarize discriminator performance
print('>Accuracy real: %.0f%%, fake: %.0f%%' % (acc_real * 100, acc_fake * 100))
# save plot
savePlot(x_fake, epoch,name)
Frechet Inception Distance: Evaluates distribution of generated images by comparing with real ground truth images.
required libraries in addition to torch, restart kernel after running
# !pip install torchmetrics
# !pip install torch-fidelity
# !pip install --upgrade typing-extensions
# !pip install torchvision
# In case of errors from torch
# pip uninstall torch
# pip cache purge
# pip install torch -f https://download.pytorch.org/whl/torch_stable.html
from torchmetrics.image.fid import FrechetInceptionDistance
import torch
fid=FrechetInceptionDistance(reset_real_features=False).to('cuda')
fid.reset()
real_images = loadTestSamples()
real_images = real_images.astype(np.float32)
real_images=torch.from_numpy(np.array(real_images))
#change dtypes, scale back first
real_images=(real_images+1)*(255/2)
real_images=real_images.type(torch.uint8)
#from 50,000x32x32x3 to 50,000x3x32x32
real_images = real_images.permute(0, 3, 1, 2)
real_images = real_images.to('cuda')
#COMPUTE FID
# create a dataloader with batch size 128
dataloader = torch.utils.data.DataLoader(real_images, batch_size=128, shuffle=True,
drop_last=True
)
# update the FID with real images
for batch,images in enumerate(dataloader):
print(f'Updating with real images now...Batch{batch}',end='\r')
fid.update(images, real=True)
print(f'\nFinished! Ready to take in generated image features')
def computeFID(generator,latentDim):
fid.reset()
#PREPARE DATA
gen_images = generateFakeSamples(generator,latentDim,10000)[0]
# gen_images=np.reshape(gen_images,(50000,3,32,32))
gen_images = gen_images.astype(np.float32)
gen_images = torch.from_numpy(np.array(gen_images))
gen_images=(gen_images+1)*(255/2)
gen_images=gen_images.type(torch.uint8)
gen_images = gen_images.permute(0, 3, 1, 2)
gen_images = gen_images.to('cuda')
# create a dataloader with batch size 128
dataloader_gen = torch.utils.data.DataLoader(gen_images, batch_size=128, shuffle=True,
# pin_memory=True,
drop_last=True)
print('')
# update the FID with generated images
for batch,images in enumerate(dataloader_gen):
print(f'Updating FID with generated images: Batch{batch}',end='\r')
fid.update(images, real=False)
print('\n')
score = fid.compute()
print(f'FID: {score.item()}')
return score
We will try to create a DCGAN now. Deep Convolutional Generative Adversarial Networks (DCGAN) is a variant of GANs where the generator and discriminator networks are composed of convolutional layers. DCGANs were proposed in 2016, and they have been used to generate high-quality images, such as faces and realistic landscapes. They are quite popular for CIFAR10 and more simple to implement than a complex architecture
# define the standalone discriminator model
def defineDisc(in_shape=(32, 32, 3)):
model = tf.keras.models.Sequential()
# normal
model.add(tf.keras.layers.Conv2D(64, (3, 3), padding='same', input_shape=in_shape))
model.add(tf.keras.layers.LeakyReLU(alpha=0.2))
# downsample
model.add(tf.keras.layers.Conv2D(64, (3, 3), strides=(2, 2), padding='same'))
model.add(tf.keras.layers.LeakyReLU(alpha=0.2))
# downsample
model.add(tf.keras.layers.Conv2D(64, (3, 3), strides=(2, 2), padding='same'))
model.add(tf.keras.layers.LeakyReLU(alpha=0.2))
# downsample
model.add(tf.keras.layers.Conv2D(64, (3, 3), strides=(2, 2), padding='same'))
model.add(tf.keras.layers.LeakyReLU(alpha=0.2))
# classifier
model.add(tf.keras.layers.Flatten())
model.add(tf.keras.layers.Dense(1, activation='sigmoid'))
# compile model
opt = tf.keras.optimizers.Adam(lr=0.0002, beta_1=0.5)
model.compile(loss='binary_crossentropy', optimizer=opt, metrics=['accuracy'])
return model
# define the standalone generator model
def defineGen(latent_dim):
model = tf.keras.models.Sequential()
# foundation for 4x4 image
n_nodes = 128 * 4 * 4
model.add(tf.keras.layers.Dense(n_nodes, input_dim=latent_dim))
model.add(tf.keras.layers.LeakyReLU(alpha=0.2))
model.add(tf.keras.layers.Reshape((4, 4, 128)))
# upsample to 8x8
model.add(tf.keras.layers.Conv2DTranspose(64, (4, 4), strides=(2, 2), padding='same'))
model.add(tf.keras.layers.LeakyReLU(alpha=0.2))
# upsample to 16x16
model.add(tf.keras.layers.Conv2DTranspose(64, (4, 4), strides=(2, 2), padding='same'))
model.add(tf.keras.layers.LeakyReLU(alpha=0.2))
# upsample to 32x32
model.add(tf.keras.layers.Conv2DTranspose(64, (4, 4), strides=(2, 2), padding='same'))
model.add(tf.keras.layers.LeakyReLU(alpha=0.2))
# output layer
model.add(tf.keras.layers.Conv2D(3, (3, 3), activation='tanh', padding='same'))
return model
# define the combined generator and discriminator model, for updating the generator
def defineGAN(genModel, discModel):
# make weights in the discriminator not trainable
discModel.trainable = False
# connect them
model = tf.keras.models.Sequential()
# add generator
model.add(genModel)
# add the discriminator
model.add(discModel)
# compile model
opt = tf.keras.optimizers.Adam(lr=0.0002,beta_1=0.5)
model.compile(loss='binary_crossentropy', optimizer=opt)
return model
# train the generator and discriminator
def trainGAN(genModel, discModel, gan_model, dataset, latentDim, n_epochs=200, n_batch=128,fid_frequency=50, name='model'):
bat_per_epo = int(dataset.shape[0] / n_batch)
half_batch = int(n_batch / 2)
# manually enumerate epochs
for i in range(n_epochs):
# enumerate batches over the training set
for j in range(bat_per_epo):
# get randomly selected 'real' samples
X_real, y_real = selectRealSamples(dataset, half_batch)
# update discriminator model weights
d_loss1, _ = discModel.train_on_batch(X_real, y_real)
# generate 'fake' examples
X_fake, y_fake = generateFakeSamples(genModel, latentDim, half_batch)
# update discriminator model weights
d_loss2, _ = discModel.train_on_batch(X_fake, y_fake)
# prepare points in latent space as input for the generator
X_gan = generateLatentPoints(latentDim, n_batch)
# create inverted labels for the fake samples
y_gan = np.ones((n_batch, 1))
# update the generator via the discriminator's error
g_loss = gan_model.train_on_batch(X_gan, y_gan)
#progress bar per epoch
if j%25==0 or j==bat_per_epo-1:
if j==bat_per_epo-1: #if last step
print('>epoch%d, %d/%d, d1=%.3f, d2=%.3f g=%.3f' % (i + 1, j + 1, bat_per_epo, d_loss1, d_loss2, g_loss))
else:
print('>epoch%d, %d/%d, d1=%.3f, d2=%.3f g=%.3f' % (i + 1, j + 1, bat_per_epo, d_loss1, d_loss2, g_loss) , end='\r')
#compute FID for every interval and last epoch
if (i>0 and (i+1)%fid_frequency==0) or (i+1)==n_epochs:
# Create a new model with the same architecture as the generator
updated_gen_model = tf.keras.models.Sequential()
updated_gen_model.add(gan_model.layers[0])
# Set the weights of the new model to be the same as the generator portion of the gan_model
updated_gen_model.set_weights(gan_model.layers[0].get_weights())
computeFID(updated_gen_model,latentDim)
# evaluate the model performance every few epochs
if (i+1) % 2 == 0:
summarisePerf(i, genModel, discModel, dataset, latentDim, name=name)
return
# size of the latent space
latent_dim = 100
# create the discriminator
discModel = defineDisc()
# create the generator
genModel = defineGen(latent_dim)
# create the gan
gan_model = defineGAN(genModel, discModel)
print('***************DISCRIMINATOR**************')
discModel.summary()
print('***************GENERATOR**************')
genModel.summary()
gan_model.summary()
%%time
# load image data
x_train = loadRealSamples()
# train model
trainGAN(genModel, discModel, gan_model, x_train, latent_dim, n_epochs=50,n_batch=256,fid_frequency=10,name='model1')
Decent for first attempt but model quite long to get out of the 'noisy' phase where its just random patterns and not an image like CIFAR10. A lot of room to improve because the model was running into a lot of mode collapse (model creating variants of same image/pattern)
# define the standalone discriminator model
def defineDisc(in_shape=(32, 32, 3)):
model = tf.keras.models.Sequential()
# normal
model.add(tf.keras.layers.Conv2D(128, (3, 3), padding='same', input_shape=in_shape))
model.add(tf.keras.layers.LeakyReLU(alpha=0.2))
# downsample
model.add(tf.keras.layers.Conv2D(128, (3, 3), strides=(2, 2), padding='same'))
model.add(tf.keras.layers.LeakyReLU(alpha=0.2))
# downsample
model.add(tf.keras.layers.Conv2D(64, (3, 3), strides=(2, 2), padding='same'))
model.add(tf.keras.layers.LeakyReLU(alpha=0.2))
# downsample
model.add(tf.keras.layers.Conv2D(64, (3, 3), strides=(2, 2), padding='same'))
model.add(tf.keras.layers.LeakyReLU(alpha=0.2))
# classifier
model.add(tf.keras.layers.Flatten())
model.add(tf.keras.layers.Dense(1, activation='sigmoid'))
# compile model
opt = tf.keras.optimizers.Adam(lr=0.0002, beta_1=0.5)
model.compile(loss='binary_crossentropy', optimizer=opt, metrics=['accuracy'])
return model
# define the standalone generator model
def defineGen(latent_dim):
model = tf.keras.models.Sequential()
# foundation for 4x4 image
n_nodes = 256 * 4 * 4
model.add(tf.keras.layers.Dense(n_nodes, input_dim=latent_dim))
model.add(tf.keras.layers.LeakyReLU(alpha=0.2))
model.add(tf.keras.layers.Reshape((4, 4, 256)))
# upsample to 8x8
model.add(tf.keras.layers.Conv2DTranspose(64, (4, 4), strides=(2, 2), padding='same'))
model.add(tf.keras.layers.LeakyReLU(alpha=0.2))
# upsample to 16x16
model.add(tf.keras.layers.Conv2DTranspose(128, (4, 4), strides=(2, 2), padding='same'))
model.add(tf.keras.layers.LeakyReLU(alpha=0.2))
# upsample to 32x32
model.add(tf.keras.layers.Conv2DTranspose(128, (4, 4), strides=(2, 2), padding='same'))
model.add(tf.keras.layers.LeakyReLU(alpha=0.2))
# output layer
model.add(tf.keras.layers.Conv2D(3, (3, 3), activation='tanh', padding='same'))
return model
# define the combined generator and discriminator model, for updating the generator
def defineGAN(genModel, discModel):
# make weights in the discriminator not trainable
discModel.trainable = False
# connect them
model = tf.keras.models.Sequential()
# add generator
model.add(genModel)
# add the discriminator
model.add(discModel)
# compile model
opt = tf.keras.optimizers.Adam(lr=0.0002,beta_1=0.5)
model.compile(loss='binary_crossentropy', optimizer=opt)
return model
# train the generator and discriminator
def trainGAN(genModel, discModel, gan_model, dataset, latentDim, n_epochs=200, n_batch=128,fid_frequency=50,name='model'):
bat_per_epo = int(dataset.shape[0] / n_batch)
half_batch = int(n_batch / 2)
# manually enumerate epochs
for i in range(n_epochs):
for j in range(bat_per_epo):
# get randomly selected 'real' samples
X_real, y_real = selectRealSamples(dataset, half_batch)
# update discriminator model weights
d_loss1, _ = discModel.train_on_batch(X_real, y_real)
# generate 'fake' examples
X_fake, y_fake = generateFakeSamples(genModel, latentDim, half_batch)
# update discriminator model weights
d_loss2, _ = discModel.train_on_batch(X_fake, y_fake)
# prepare points in latent space as input for the generator
X_gan = generateLatentPoints(latentDim, n_batch)
# create inverted labels for the fake samples
y_gan = np.ones((n_batch, 1))
# update the generator via the discriminator's error
g_loss = gan_model.train_on_batch(X_gan, y_gan)
#progress bar per epoch
if j%25==0 or j==bat_per_epo-1:
if j==bat_per_epo-1: #if last step
print('>epoch%d, %d/%d, d1=%.3f, d2=%.3f g=%.3f' % (i + 1, j + 1, bat_per_epo, d_loss1, d_loss2, g_loss))
else:
print('>epoch%d, %d/%d, d1=%.3f, d2=%.3f g=%.3f' % (i + 1, j + 1, bat_per_epo, d_loss1, d_loss2, g_loss) , end='\r')
#compute FID for every interval and last epoch
if (i>0 and (i+1)%fid_frequency==0) or (i+1)==n_epochs:
# Create a new model with the same architecture as the generator
updated_gen_model = tf.keras.models.Sequential()
updated_gen_model.add(gan_model.layers[0])
# Set the weights of the new model to be the same as the generator portion of the gan_model
updated_gen_model.set_weights(gan_model.layers[0].get_weights())
computeFID(updated_gen_model,latentDim)
# evaluate the model performance every few epochs
if (i+1) % 2 == 0:
summarisePerf(i, genModel, discModel, dataset, latentDim,name=name)
return
# size of the latent space
latent_dim = 100
# create the discriminator
discModel = defineDisc()
# create the generator
genModel = defineGen(latent_dim)
# create the gan
gan_model = defineGAN(genModel, discModel)
print('***************DISCRIMINATOR**************')
discModel.summary()
print('***************GENERATOR**************')
genModel.summary()
gan_model.summary()
%%time
# load image data
x_train = loadRealSamples()
# train model
trainGAN(genModel, discModel, gan_model, x_train, latent_dim, n_epochs=100,n_batch=64,fid_frequency=10,name='model2NoBN')
FID of ~43 is a great improvement from our baseline, let's see if batch normalisation will improve even further
Only use in Generator model, otherwise model collapsed
# define the standalone discriminator model
def defineDisc(in_shape=(32, 32, 3)):
model = tf.keras.models.Sequential()
# normal
model.add(tf.keras.layers.Conv2D(128, (3, 3), padding='same', input_shape=in_shape))
model.add(tf.keras.layers.LeakyReLU(alpha=0.2))
# downsample
model.add(tf.keras.layers.Conv2D(128, (3, 3), strides=(2, 2), padding='same'))
model.add(tf.keras.layers.LeakyReLU(alpha=0.2))
# downsample
model.add(tf.keras.layers.Conv2D(64, (3, 3), strides=(2, 2), padding='same'))
model.add(tf.keras.layers.LeakyReLU(alpha=0.2))
# downsample
model.add(tf.keras.layers.Conv2D(64, (3, 3), strides=(2, 2), padding='same'))
model.add(tf.keras.layers.LeakyReLU(alpha=0.2))
# classifier
model.add(tf.keras.layers.Flatten())
model.add(tf.keras.layers.Dropout(0.4))
model.add(tf.keras.layers.Dense(1, activation='sigmoid'))
# compile model
opt = tf.keras.optimizers.Adam(lr=0.0002, beta_1=0.5)
model.compile(loss='binary_crossentropy', optimizer=opt, metrics=['accuracy'])
return model
# define the standalone generator model
def defineGen(latent_dim):
model = tf.keras.models.Sequential()
# foundation for 4x4 image
n_nodes = 256 * 4 * 4
model.add(tf.keras.layers.Dense(n_nodes, input_dim=latent_dim))
model.add(tf.keras.layers.BatchNormalization(momentum=0.9))
model.add(tf.keras.layers.LeakyReLU(alpha=0.2))
model.add(tf.keras.layers.Reshape((4, 4, 256)))
# upsample to 8x8
model.add(tf.keras.layers.Conv2DTranspose(64, (4, 4), strides=(2, 2), padding='same'))
model.add(tf.keras.layers.BatchNormalization(momentum=0.9))
model.add(tf.keras.layers.LeakyReLU(alpha=0.2))
# upsample to 16x16
model.add(tf.keras.layers.Conv2DTranspose(128, (4, 4), strides=(2, 2), padding='same'))
model.add(tf.keras.layers.BatchNormalization(momentum=0.9))
model.add(tf.keras.layers.LeakyReLU(alpha=0.2))
# upsample to 32x32
model.add(tf.keras.layers.Conv2DTranspose(128, (4, 4), strides=(2, 2), padding='same'))
model.add(tf.keras.layers.BatchNormalization(momentum=0.9))
model.add(tf.keras.layers.LeakyReLU(alpha=0.2))
# output layer
model.add(tf.keras.layers.Conv2D(3, (3, 3), activation='tanh', padding='same'))
return model
# size of the latent space
latent_dim = 100
# create the discriminator
discModel = defineDisc()
# create the generator
genModel = defineGen(latent_dim)
# create the gan
gan_model = defineGAN(genModel, discModel)
print('***************DISCRIMINATOR**************')
discModel.summary()
print('***************GENERATOR**************')
genModel.summary()
gan_model.summary()
%%time
# load image data
x_train = loadRealSamples()
# train model
trainGAN(genModel, discModel, gan_model, x_train, latent_dim, n_epochs=100,n_batch=64,fid_frequency=10,name='model2BN')
We will do light augmentation because too much augmentation can cause the generator to learn the distribution of augmented data instead of real data
def createImageGen():
datagen = tf.keras.preprocessing.image.ImageDataGenerator(
rotation_range=15,
width_shift_range=0.15,
height_shift_range=0.15,
zoom_range=0.2,
horizontal_flip=True
)
return datagen
#function to reverse scaling/preprocessing steps, for data visualisation purposes
def reverseProcess(X):
X = (X *127.5)+127.5
X = X.astype('uint8')
return X
#visualise some examples
datagen=createImageGen()
x_train=loadRealSamples()
images=x_train[0:5]
augImages= datagen.flow(images, shuffle=False).next()
fig=plt.figure(figsize=(8,8))
fig.subplots_adjust(wspace=0.1)
count=1
for img,augImg in zip(reverseProcess(images),reverseProcess(augImages)):
plt.subplot(5,2,count)
plt.axis('off')
plt.imshow(img)
count+=1
plt.subplot(5,2,count)
plt.axis('off')
plt.imshow(augImg )
count+=1
plt.suptitle('Original Augmented',fontsize=18)
plt.show()
Now when the code displays the generated images the seed of the latent points will be the same. This way we can more easily see the progress of the model as it is trying to generate the same image everytime (doesnt affect actual training of model only visualisation)
import random #use python random library for fixed seeds
def generateFixedLatents(latentDim, n_samples):
random.seed(0) #fixed seed
# generate points in the latent space
x_input = [random.gauss(0, 1) for i in range(latentDim * n_samples)]
# reshape into a batch of inputs for the network
x_input = np.array(x_input).reshape(n_samples, latentDim)
return x_input
# use the generator to generate n fake examples, with class labels
def generateFixedSamples(genModel, latentDim, n_samples):
# generate points in latent space
x_input = generateFixedLatents(latentDim, n_samples)
# predict outputs
X = genModel.predict(x_input)
# create 'fake' class labels (0)
y = np.zeros((n_samples, 1))
return X, y
# evaluate the discriminator, plot generated images, save generator model
def summarisePerf2(epoch, genModel, discModel, dataset, latentDim, name='model', n_samples=150):
# prepare real samples
X_real, y_real = selectRealSamples(dataset, n_samples)
# evaluate discriminator on real examples
_, acc_real = discModel.evaluate(X_real, y_real, verbose=0)
# prepare fake examples
x_fake, y_fake = generateFixedSamples(genModel, latentDim, n_samples)
# evaluate discriminator on fake examples
_, acc_fake = discModel.evaluate(x_fake, y_fake, verbose=0)
# summarize discriminator performance
print('>Accuracy real: %.0f%%, fake: %.0f%%' % (acc_real * 100, acc_fake * 100))
# save plot
savePlot(x_fake, epoch,name,save_frequency=1)
Change positive labels (1) to a smooth range, this is known as soft label smoothing. This approach helps to improve the model's generalization ability by making it more robust to noise in the target labels.
def smooth_positive_labels(y):
return y - 0.1 + (np.random.random(y.shape) * 0.1) #range of 0.9 to 1
Gaussian weight initialization is used to initialize the weights of layers with random values from a normal (Gaussian) distribution. This helps to prevent the model from getting stuck in poor local minima, leading to better convergence during training
def gaussian_weight_init(shape, dtype=None):
mean = 0.0
std = 0.1
return K.random_normal(shape, mean=mean, stddev=std, dtype=dtype)
# define the standalone discriminator model
def defineDisc(in_shape=(32, 32, 3)):
model = tf.keras.models.Sequential()
# normal
model.add(tf.keras.layers.Conv2D(128, (3, 3), padding='same', input_shape=in_shape,kernel_initializer=gaussian_weight_init))
model.add(tf.keras.layers.LeakyReLU(alpha=0.2))
# downsample
model.add(tf.keras.layers.Conv2D(128, (3, 3), strides=(2, 2), padding='same',kernel_initializer=gaussian_weight_init))
model.add(tf.keras.layers.LeakyReLU(alpha=0.2))
# downsample
model.add(tf.keras.layers.Conv2D(128, (3, 3), strides=(2, 2), padding='same',kernel_initializer=gaussian_weight_init))
model.add(tf.keras.layers.LeakyReLU(alpha=0.2))
# downsample
model.add(tf.keras.layers.Conv2D(128, (3, 3), strides=(2, 2), padding='same',kernel_initializer=gaussian_weight_init))
model.add(tf.keras.layers.LeakyReLU(alpha=0.2))
# classifier
model.add(tf.keras.layers.Flatten())
model.add(tf.keras.layers.Dropout(0.4))
model.add(tf.keras.layers.Dense(1, activation='sigmoid'))
# compile model
opt = tf.keras.optimizers.Adam(lr=0.0002, beta_1=0.5)
model.compile(loss='binary_crossentropy', optimizer=opt, metrics=['accuracy'])
return model
# define the standalone generator model
def defineGen(latent_dim):
model = tf.keras.models.Sequential()
# foundation for 4x4 image
n_nodes = 256 * 4 * 4
model.add(tf.keras.layers.Dense(n_nodes, input_dim=latent_dim))
model.add(tf.keras.layers.BatchNormalization(momentum=0.9))
model.add(tf.keras.layers.LeakyReLU(alpha=0.2))
model.add(tf.keras.layers.Reshape((4, 4, 256)))
# upsample to 8x8
model.add(tf.keras.layers.Conv2DTranspose(128, (4, 4), strides=(2, 2), padding='same',kernel_initializer=gaussian_weight_init))
model.add(tf.keras.layers.BatchNormalization(momentum=0.9))
model.add(tf.keras.layers.LeakyReLU(alpha=0.2))
# upsample to 16x16
model.add(tf.keras.layers.Conv2DTranspose(128, (4, 4), strides=(2, 2), padding='same',kernel_initializer=gaussian_weight_init))
model.add(tf.keras.layers.BatchNormalization(momentum=0.9))
model.add(tf.keras.layers.LeakyReLU(alpha=0.2))
# upsample to 32x32
model.add(tf.keras.layers.Conv2DTranspose(64, (4, 4), strides=(2, 2), padding='same',kernel_initializer=gaussian_weight_init))
model.add(tf.keras.layers.BatchNormalization(momentum=0.9))
model.add(tf.keras.layers.LeakyReLU(alpha=0.2))
# output layer
model.add(tf.keras.layers.Conv2D(3, (3, 3), activation='tanh', padding='same'))
return model
#Store losses in list
d1Losses=[]
d2Losses=[]
gLosses=[]
During training we can change the learning rate to become lower based on the epoch count. This allows for more fine grained learning nearer to the end. Like taking smaller steps down the mountain the lower we go during gradient descent.
#Learning rate schedulers
def schedulerDisc(epoch):
if epoch < 30:
return 0.00015
elif epoch < 60:
return 0.00012
elif epoch < 120:
return 0.0001
else:
return 0.00007
def schedulerGen(epoch):
if epoch < 30:
return 0.0002
elif epoch < 60:
return 0.00018
elif epoch < 120:
return 0.00015
else:
return 0.0001
# define the combined generator and discriminator model, for updating the generator
def defineGAN(genModel, discModel):
# make weights in the discriminator not trainable
discModel.trainable = False
# connect them
model = tf.keras.models.Sequential()
# add generator
model.add(genModel)
# add the discriminator
model.add(discModel)
# compile model
opt = tf.keras.optimizers.Adam(lr=0.0002,beta_1=0.5)
model.compile(loss='binary_crossentropy', optimizer=opt)
return model
# train the generator and discriminator
def trainGAN(genModel, discModel, gan_model, dataset,latentDim, n_epochs=200, n_batch=128,fid_frequency=50,sumFrequency=2,name='model'):
bat_per_epo = int(dataset.shape[0] / n_batch)
half_batch = int(n_batch / 2)
bestFID=200 #just leave as a high number first
iter=datagen.flow(dataset,y=None,batch_size=half_batch) #half batch size
# manually enumerate epochs
for i in range(n_epochs):
#set learning rates via schedulers
K.set_value(discModel.optimizer.learning_rate,schedulerDisc(i))
K.set_value(gan_model.optimizer.learning_rate,schedulerGen(i))
# enumerate batches over the training set
for j in range(bat_per_epo):
# get randomly selected augmented samples
X_real=iter.next()
y_real = np.ones((len(X_real), 1)) #get 'real' labels
#smoothing labels
y_real=smooth_positive_labels(y_real)
# update discriminator model weights
d_loss1, _ = discModel.train_on_batch(X_real, y_real)
# generate 'fake' examples
X_fake, y_fake = generateFakeSamples(genModel, latentDim, half_batch)
# update discriminator model weights
d_loss2, _ = discModel.train_on_batch(X_fake, y_fake)
# prepare points in latent space as input for the generator
X_gan = generateLatentPoints(latentDim, n_batch)
# create inverted labels for the fake samples
y_gan = np.ones((n_batch, 1))
# update the generator via the discriminator's error
g_loss = gan_model.train_on_batch(X_gan, y_gan)
#progress bar per epoch
if j%25==0 or j==bat_per_epo-1:
dLR_sf = round(discModel.optimizer.learning_rate.numpy() / 10 ** math.floor(math.log10(discModel.optimizer.learning_rate.numpy())),1) * 10 ** math.floor(math.log10(discModel.optimizer.learning_rate.numpy()))
gLR_sf = round(gan_model.optimizer.learning_rate.numpy() / 10 ** math.floor(math.log10(gan_model.optimizer.learning_rate.numpy())),1) * 10 ** math.floor(math.log10(gan_model.optimizer.learning_rate.numpy()))
if j==bat_per_epo-1: #if last step
#updating losses lists
d1Losses.append(d_loss1)
d2Losses.append(d_loss2)
gLosses.append(g_loss)
print(f'>epoch{i+1}, {j+1}/{bat_per_epo}, dLR:{dLR_sf}, gLR:{gLR_sf}, d1={d_loss1:.3f}, d2={d_loss2:.3f}, g={g_loss:.3f}')
else:
print(f'>epoch{i+1}, {j+1}/{bat_per_epo}, dLR:{dLR_sf}, gLR:{gLR_sf}, d1={d_loss1:.3f}, d2={d_loss2:.3f}, g={g_loss:.3f}',
end='\r')
#compute FID for every interval and last epoch
if (i>0 and (i+1)%fid_frequency==0) or (i+1)==n_epochs:
updated_gen_model = tf.keras.models.Sequential()
updated_gen_model.add(gan_model.layers[0])
# Set the weights of the new model to be the same as the generator portion of the gan_model
updated_gen_model.set_weights(gan_model.layers[0].get_weights())
FID=computeFID(updated_gen_model,latentDim)
if FID<bestFID:
print('saving best model')
bestFID=FID
#save model
updated_gen_model = tf.keras.models.Sequential()
updated_gen_model.add(gan_model.layers[0])
# Set the weights of the new model to be the same as the generator portion of the gan_model
updated_gen_model.set_weights(gan_model.layers[0].get_weights())
updated_gen_model.save('models/model3.h5')
# evaluate the model performance every few epochs
summarisePerf2(i, genModel, discModel, dataset, latentDim)
#return losses lists
return [d1Losses,d2Losses,gLosses]
# size of the latent space
latent_dim = 128
# create the discriminator
discModel = defineDisc()
# create the generator
genModel = defineGen(latent_dim)
# create the gan
gan_model = defineGAN(genModel, discModel)
print('***************DISCRIMINATOR**************')
discModel.summary()
print('***************GENERATOR**************')
genModel.summary()
gan_model.summary()
%%time
# load image data
x_train = loadRealSamples()
# train model
d1Loss,d2Loss,gLoss=trainGAN(genModel, discModel, gan_model, x_train,
latent_dim, n_epochs=200,n_batch=64,fid_frequency=5,sumFrequency=1,name='model3')
epochs=range(len(d1Loss))
plt.figure(figsize=(15,10))
plt.plot(epochs,d1Loss)
plt.plot(epochs,d2Loss)
plt.plot(epochs,gLoss)
plt.title('Model 3 Training')
plt.ylabel('Loss')
plt.xlabel('epoch')
plt.legend(['d1','d2','g'],loc='upper left')
plt.show()
Previously in training we were calculating FID on the 10K test set of images. This is quite efficient as it is fast to compute but it is not the most accurate way to calculate FID. We shall now calculate on the full set of 50K+10K=60K images since it is what we trained on too.
#load
bestModel=tf.keras.models.load_model('models/model3.h5',custom_objects={'gaussian_weight_init': gaussian_weight_init})
bestModel.summary()
from torchmetrics.image.fid import FrechetInceptionDistance
import torch
fid=FrechetInceptionDistance(reset_real_features=False).to('cuda')
fid.reset()
real_images = loadRealSamples()
real_images = real_images.astype(np.float32)
real_images=torch.from_numpy(np.array(real_images))
#change dtypes, scale back first
real_images=(real_images+1)*(255/2)
real_images=real_images.type(torch.uint8)
#from 50,000x32x32x3 to 50,000x3x32x32
real_images = real_images.permute(0, 3, 1, 2)
real_images = real_images.to('cuda')
#COMPUTE FID
# create a dataloader with batch size 128
dataloader = torch.utils.data.DataLoader(real_images, batch_size=128, shuffle=True,
drop_last=True
)
# update the FID with real images
for batch,images in enumerate(dataloader):
print(f'Updating with real images now...Batch{batch}',end='\r')
fid.update(images, real=True)
print(f'\nFinished! Ready to take in generated image features')
def computeFID(generator,latentDim):
fid.reset()
#PREPARE DATA
gen_images = generateFakeSamples(generator,latentDim,60000)[0]
# gen_images=np.reshape(gen_images,(50000,3,32,32))
gen_images = gen_images.astype(np.float32)
gen_images = torch.from_numpy(np.array(gen_images))
gen_images=(gen_images+1)*(255/2)
gen_images=gen_images.type(torch.uint8)
gen_images = gen_images.permute(0, 3, 1, 2)
gen_images = gen_images.to('cuda')
# create a dataloader with batch size 128
dataloader_gen = torch.utils.data.DataLoader(gen_images, batch_size=128, shuffle=True,
# pin_memory=True,
drop_last=True)
print('')
# update the FID with generated images
for batch,images in enumerate(dataloader_gen):
print(f'Updating FID with generated images: Batch{batch}',end='\r')
fid.update(images, real=False)
print('\n')
score = fid.compute()
print(f'FID: {score.item()}')
return score
FID=computeFID(bestModel,100)
Our FID had a nice improvement to 32. From the loss graph we can see that near the end of training generator loss was slightly increasing but FID was still remaining stable/improving. So learning was still taking place at that time.
Our DCGAN gave good performance but in a real life scenario being able to generate a random image from such a broad dataset like CIFAR10 is not very useful. If we can specify which class we want to generate like a car or a plane, that will become very useful. Conditional DCGAN is a variant of DCGANs that generates new data samples based on a given condition, such as in this case, class labels. They are more challenging to train because the generator has to take into account the additional condition and generate samples that conform to it. Additionally, the discriminator has to be able to distinguish between real and fake samples with respect to the condition, which increases its complexity and the difficulty of training. However, the benefits of being able to generate class-conditioned samples often outweigh the additional challenges.
# !pip install -q git+https://github.com/tensorflow/docs
# !pip install imageio
from tensorflow.keras import layers
from tensorflow_docs.vis import embed
import imageio
from tensorflow import keras
batch_size = 64
num_channels = 3
num_classes = 10
image_size = 32
latent_dim = 128
all_digits=loadRealSamples()
all_labels=loadRealLabels()
print(all_digits.shape)
print(all_labels.shape)
# Create tf.data.Dataset.
dataset = tf.data.Dataset.from_tensor_slices((all_digits, all_labels))
dataset = dataset.shuffle(buffer_size=1024).batch(batch_size)
generator_in_channels = latent_dim + num_classes
discriminator_in_channels = num_channels + num_classes
print(generator_in_channels, discriminator_in_channels)
# Create the discriminator.
discriminator = tf.keras.Sequential(
[
layers.InputLayer((32, 32,discriminator_in_channels)),
layers.Conv2D(64, (5,5),strides=2, padding="same"),
layers.LeakyReLU(alpha=0.1),
layers.Conv2D(128, (5,5), strides=(2, 2), padding="same"),
layers.LeakyReLU(alpha=0.1),
layers.Conv2D(128, (5,5), strides=(2, 2), padding="same"),
layers.LeakyReLU(alpha=0.1),
layers.Conv2D(256, (5,5), strides=(2, 2), padding="same"),
layers.LeakyReLU(alpha=0.1),
layers.Flatten(),
layers.Dense(256,activation='relu'),
layers.Dropout(0.3),
layers.Dense(1)
],
name="discriminator",
)
# Create the generator.
generator = keras.Sequential(
[
layers.InputLayer((generator_in_channels,)),
# We want to generate 128 + num_classes coefficients to reshape into a
# 7x7x(128 + num_classes) map.
layers.Dense(4 * 4 * 256),
# layers.BatchNormalization(momentum=0.9),
layers.LeakyReLU(alpha=0.1),
layers.Reshape((4,4, 256)),
layers.Conv2DTranspose(256, (5,5), strides=(2, 2), padding="same"),
# layers.BatchNormalization(momentum=0.9),
layers.LeakyReLU(alpha=0.1),
layers.Conv2DTranspose(128, (5,5), strides=(2, 2), padding="same"),
# layers.BatchNormalization(momentum=0.9),
layers.LeakyReLU(alpha=0.1),
layers.Conv2DTranspose(64, (5, 5), strides=(2, 2), padding="same"),
# layers.BatchNormalization(momentum=0.9),
layers.LeakyReLU(alpha=0.1),
layers.Conv2D(3, (4,4), padding="same", activation="tanh"),
],
name="generator",
)
discriminator.summary()
generator.summary()
#Conditional GAN class, inherit from keras Model class
class ConditionalGAN(keras.Model):
def __init__(self, discriminator, generator, latent_dim):
super().__init__()
self.discriminator = discriminator
self.generator = generator
self.latent_dim = latent_dim
self.gen_loss_tracker = keras.metrics.Mean(name="generator_loss")
self.disc_loss_tracker = keras.metrics.Mean(name="discriminator_loss")
@property
def metrics(self):
return [self.gen_loss_tracker, self.disc_loss_tracker]
def compile(self, d_optimizer, g_optimizer, loss_fn):
super().compile()
self.d_optimizer = d_optimizer
self.g_optimizer = g_optimizer
self.loss_fn = loss_fn
def train_step(self, data):
# Unpack the data.
real_images, one_hot_labels = data
# Add dummy dimensions to the labels so that they can be concatenated with
image_one_hot_labels = one_hot_labels[:, :, None, None]
image_one_hot_labels = tf.repeat(
image_one_hot_labels, repeats=[image_size * image_size]
)
image_one_hot_labels = tf.reshape(
image_one_hot_labels, (-1, image_size, image_size, num_classes)
)
# Sample random points in the latent space and concatenate labels
batch_size = tf.shape(real_images)[0]
random_latent_vectors = tf.random.normal(shape=(batch_size, self.latent_dim))
random_vector_labels = tf.concat(
[random_latent_vectors, one_hot_labels], axis=1
)
# Decode the noise (guided by labels) to fake images.
generated_images = self.generator(random_vector_labels)
# Combine them with real images
fake_image_and_labels = tf.concat([generated_images, image_one_hot_labels], -1)
real_image_and_labels = tf.concat([real_images, image_one_hot_labels], -1)
combined_images = tf.concat(
[fake_image_and_labels, real_image_and_labels], axis=0
)
# Assemble labels discriminating real from fake images.
labels = tf.concat(
[tf.ones((batch_size, 1)), tf.zeros((batch_size, 1))], axis=0
)
# Train the discriminator.
with tf.GradientTape() as tape:
predictions = self.discriminator(combined_images)
d_loss = self.loss_fn(labels, predictions)
grads = tape.gradient(d_loss, self.discriminator.trainable_weights)
self.d_optimizer.apply_gradients(
zip(grads, self.discriminator.trainable_weights)
)
# Sample random points in the latent space.
random_latent_vectors = tf.random.normal(shape=(batch_size, self.latent_dim))
random_vector_labels = tf.concat(
[random_latent_vectors, one_hot_labels], axis=1
)
# Assemble labels that say "all real images".
misleading_labels = tf.zeros((batch_size, 1))
# Train the generator
with tf.GradientTape() as tape:
fake_images = self.generator(random_vector_labels)
fake_image_and_labels = tf.concat([fake_images, image_one_hot_labels], -1)
predictions = self.discriminator(fake_image_and_labels)
g_loss = self.loss_fn(misleading_labels, predictions)
grads = tape.gradient(g_loss, self.generator.trainable_weights)
self.g_optimizer.apply_gradients(zip(grads, self.generator.trainable_weights))
# Monitor loss.
self.gen_loss_tracker.update_state(g_loss)
self.disc_loss_tracker.update_state(d_loss)
return {
"g_loss": self.gen_loss_tracker.result(),
"d_loss": self.disc_loss_tracker.result(),
}
cond_gan = ConditionalGAN(
discriminator=discriminator, generator=generator, latent_dim=latent_dim
)
cond_gan.compile(
d_optimizer=keras.optimizers.Adam(learning_rate=0.0002),
g_optimizer=keras.optimizers.Adam(learning_rate=0.0002),
loss_fn=keras.losses.BinaryCrossentropy(from_logits=True),
)
cond_gan.fit(dataset, epochs=200)
#save model
cond_gan.generator.save('models/cgan.h5')
gen=cond_gan.generator
class_names = ["airplane", "automobile", "bird", "cat", "deer",
"dog", "frog", "horse", "ship", "truck"]
# Generate 100 images in total
num_images = 100
num_classes = 10
plt.figure(figsize=(15,15))
# Generate 10 random latent vectors for each class
random_latent_vectors = tf.random.normal(shape=(num_images, 128))
# Generate one-hot labels for each class
one_hot_labels = np.zeros((num_images, num_classes))
for i in range(num_classes):
start_idx = i * 10
end_idx = start_idx + 10
one_hot_labels[start_idx:end_idx, i] = 1
# Concatenate the random latent vectors and one-hot labels
random_vector_labels = tf.concat([random_latent_vectors, one_hot_labels], axis=1)
# Generate images using the CGAN model
imgs = gen.predict(random_vector_labels)
# Scale the images from [-1, 1] to [0, 1]
imgs = (imgs + 1) / 2.0
# Plot the generated images
rowCount=0
for i in range(num_images):
# Define the subplot
plt.subplot(num_images // 10, 10, 1 + i)
# Turn off axis
# Plot raw pixel data
if i%10 == 0:
plt.ylabel(class_names[rowCount], size=10)
rowCount+=1
else:
plt.axis('off')
plt.imshow(imgs[i])
plt.savefig('CGAN.png')
plt.show()
from torchmetrics.image.fid import FrechetInceptionDistance
import torch
fid=FrechetInceptionDistance(reset_real_features=False).to('cuda')
fid.reset()
real_images = loadRealSamples()
real_images = real_images.astype(np.float32)
real_images=torch.from_numpy(np.array(real_images))
#change dtypes, scale back first
real_images=(real_images+1)*(255/2)
real_images=real_images.type(torch.uint8)
#from 50,000x32x32x3 to 50,000x3x32x32
real_images = real_images.permute(0, 3, 1, 2)
real_images = real_images.to('cuda')
#COMPUTE FID
# create a dataloader with batch size 128
dataloader = torch.utils.data.DataLoader(real_images, batch_size=128, shuffle=True,
drop_last=True
)
# update the FID with real images
for batch,images in enumerate(dataloader):
print(f'Updating with real images now...Batch{batch}',end='\r')
fid.update(images, real=True)
print(f'\nFinished! Ready to take in generated image features')
def computeFID(generator,latentDim):
fid.reset()
#PREPARE DATA
random_latent_vectors = tf.random.normal(shape=(60000, 128))
one_hot_labels=tf.one_hot(tf.cast(tf.range(0,60000), dtype=tf.int32), depth=10)
random_vector_labels = tf.concat(
[random_latent_vectors, one_hot_labels], axis=1
)
gen_images=generator.predict(random_vector_labels)
# gen_images=np.reshape(gen_images,(50000,3,32,32))
gen_images = gen_images.astype(np.float32)
gen_images = torch.from_numpy(np.array(gen_images))
gen_images=(gen_images+1)*(255/2)
gen_images=gen_images.type(torch.uint8)
gen_images = gen_images.permute(0, 3, 1, 2)
gen_images = gen_images.to('cuda')
# create a dataloader with batch size 128
dataloader_gen = torch.utils.data.DataLoader(gen_images, batch_size=128, shuffle=True,
# pin_memory=True,
drop_last=True)
print('')
# update the FID with generated images
for batch,images in enumerate(dataloader_gen):
print(f'Updating FID with generated images: Batch{batch}',end='\r')
fid.update(images, real=False)
print('\n')
score = fid.compute()
print(f'FID: {score.item()}')
return score
computeFID(gen,128)
print('')
WGAN (Wasserstein Generative Adversarial Network) uses the Wasserstein distance metric to evaluate the performance of the generator. Using WGAN over regular DCGAN (Deep Convolutional GAN) can give us better potential in our model as it provides a more stable training process and produces higher-quality generated images. To implement this architecture, we must overhaul our entire training loop/architecture
IMG_SHAPE = (32,32,3)
BATCH_SIZE = 512
# Size of the noise vector
noise_dim = 128
# Reshape each sample to (28, 28, 1) and normalize the pixel values in the [-1, 1] range
train_images = loadRealSamples()
def conv_block(
x,
filters,
activation,
kernel_size=(3, 3),
strides=(1, 1),
padding="same",
use_bias=True,
use_bn=False,
use_dropout=False,
drop_value=0.5,
):
x = layers.Conv2D(
filters, kernel_size, strides=strides, padding=padding, use_bias=use_bias
)(x)
if use_bn:
x = layers.BatchNormalization()(x)
x = activation(x)
if use_dropout:
x = layers.Dropout(drop_value)(x)
return x
def get_discriminator_model():
img_input = layers.Input(shape=IMG_SHAPE)
# Zero pad the input to make the input images size to (32, 32, 1).
x = layers.ZeroPadding2D((2, 2))(img_input)
x = conv_block(
x,
64,
kernel_size=(5, 5),
strides=(2, 2),
use_bn=False,
use_bias=True,
activation=layers.LeakyReLU(0.2),
use_dropout=False,
drop_value=0.3,
)
x = conv_block(
x,
128,
kernel_size=(5, 5),
strides=(2, 2),
use_bn=False,
activation=layers.LeakyReLU(0.2),
use_bias=True,
use_dropout=True,
drop_value=0.3,
)
x = conv_block(
x,
256,
kernel_size=(5, 5),
strides=(2, 2),
use_bn=False,
activation=layers.LeakyReLU(0.2),
use_bias=True,
use_dropout=True,
drop_value=0.3,
)
x = conv_block(
x,
512,
kernel_size=(5, 5),
strides=(2, 2),
use_bn=False,
activation=layers.LeakyReLU(0.2),
use_bias=True,
use_dropout=False,
drop_value=0.3,
)
x = layers.Flatten()(x)
x = layers.Dropout(0.2)(x)
x = layers.Dense(1)(x)
d_model = keras.models.Model(img_input, x, name="discriminator")
return d_model
d_model = get_discriminator_model()
d_model.summary()
def upsample_block(
x,
filters,
activation,
kernel_size=(3, 3),
strides=(1, 1),
up_size=(2, 2),
padding="same",
use_bn=False,
use_bias=True,
use_dropout=False,
drop_value=0.3,
):
x = layers.UpSampling2D(up_size)(x)
x = layers.Conv2D(
filters, kernel_size, strides=strides, padding=padding, use_bias=use_bias
)(x)
if use_bn:
x = layers.BatchNormalization()(x)
if activation:
x = activation(x)
if use_dropout:
x = layers.Dropout(drop_value)(x)
return x
def get_generator_model():
noise = layers.Input(shape=(noise_dim,))
x = layers.Dense(4 * 4 * 256, use_bias=False)(noise)
x = layers.BatchNormalization()(x)
x = layers.LeakyReLU(0.2)(x)
x = layers.Reshape((4, 4, 256))(x)
x = upsample_block(
x,
128,
layers.LeakyReLU(0.2),
strides=(1, 1),
use_bias=False,
use_bn=True,
padding="same",
use_dropout=False,
)
x = upsample_block(
x,
64,
layers.LeakyReLU(0.2),
strides=(1, 1),
use_bias=False,
use_bn=True,
padding="same",
use_dropout=False,
)
x = upsample_block(
x, 3, layers.Activation("tanh"), strides=(1, 1), use_bias=False, use_bn=True
)
# # At this point, we have an output which has the same shape as the input, (32, 32, 1).
# # We will use a Cropping2D layer to make it (28, 28, 1).
# x = layers.Cropping2D((2, 2))(x)
g_model = keras.models.Model(noise, x, name="generator")
return g_model
g_model = get_generator_model()
g_model.summary()
class WGAN(tf.keras.Model):
def __init__(
self,
discriminator,
generator,
latent_dim,
discriminator_extra_steps=3,
gp_weight=10.0,
):
super().__init__()
self.discriminator = discriminator
self.generator = generator
self.latent_dim = latent_dim
self.d_steps = discriminator_extra_steps
self.gp_weight = gp_weight
def compile(self, d_optimizer, g_optimizer, d_loss_fn, g_loss_fn):
super().compile()
self.d_optimizer = d_optimizer
self.g_optimizer = g_optimizer
self.d_loss_fn = d_loss_fn
self.g_loss_fn = g_loss_fn
def gradient_penalty(self, batch_size, real_images, fake_images):
"""Calculates the gradient penalty.
This loss is calculated on an interpolated image
and added to the discriminator loss.
"""
# Get the interpolated image
alpha = tf.random.normal([batch_size, 1, 1, 1], 0.0, 1.0)
diff = fake_images - real_images
interpolated = real_images + alpha * diff
with tf.GradientTape() as gp_tape:
gp_tape.watch(interpolated)
# 1. Get the discriminator output for this interpolated image.
pred = self.discriminator(interpolated, training=True)
# 2. Calculate the gradients w.r.t to this interpolated image.
grads = gp_tape.gradient(pred, [interpolated])[0]
# 3. Calculate the norm of the gradients.
norm = tf.sqrt(tf.reduce_sum(tf.square(grads), axis=[1, 2, 3]))
gp = tf.reduce_mean((norm - 1.0) ** 2)
return gp
def train_step(self, real_images):
if isinstance(real_images, tuple):
real_images = real_images[0]
# Get the batch size
batch_size = tf.shape(real_images)[0]
# 1. Train the generator and get the generator loss
# 2. Train the discriminator and get the discriminator loss
# 3. Calculate the gradient penalty
# 4. Multiply this gradient penalty with a constant weight factor
# 5. Add the gradient penalty to the discriminator loss
# 6. Return the generator and discriminator losses as a loss dictionary
# Train the discriminator first. The original paper recommends training
# the discriminator for `x` more steps (typically 5) as compared to
# one step of the generator. Here we will train it for 3 extra steps
# as compared to 5 to reduce the training time.
for i in range(self.d_steps):
# Get the latent vector
random_latent_vectors = tf.random.normal(
shape=(batch_size, self.latent_dim)
)
with tf.GradientTape() as tape:
# Generate fake images from the latent vector
fake_images = self.generator(random_latent_vectors, training=True)
# Get the logits for the fake images
fake_logits = self.discriminator(fake_images, training=True)
# Get the logits for the real images
real_logits = self.discriminator(real_images, training=True)
# Calculate the discriminator loss using the fake and real image logits
d_cost = self.d_loss_fn(real_img=real_logits, fake_img=fake_logits)
# Calculate the gradient penalty
gp = self.gradient_penalty(batch_size, real_images, fake_images)
# Add the gradient penalty to the original discriminator loss
d_loss = d_cost + gp * self.gp_weight
# Get the gradients w.r.t the discriminator loss
d_gradient = tape.gradient(d_loss, self.discriminator.trainable_variables)
# Update the weights of the discriminator using the discriminator optimizer
self.d_optimizer.apply_gradients(
zip(d_gradient, self.discriminator.trainable_variables)
)
# Train the generator
# Get the latent vector
random_latent_vectors = tf.random.normal(shape=(batch_size, self.latent_dim))
with tf.GradientTape() as tape:
# Generate fake images using the generator
generated_images = self.generator(random_latent_vectors, training=True)
# Get the discriminator logits for fake images
gen_img_logits = self.discriminator(generated_images, training=True)
# Calculate the generator loss
g_loss = self.g_loss_fn(gen_img_logits)
# Get the gradients w.r.t the generator loss
gen_gradient = tape.gradient(g_loss, self.generator.trainable_variables)
# Update the weights of the generator using the generator optimizer
self.g_optimizer.apply_gradients(
zip(gen_gradient, self.generator.trainable_variables)
)
return {"d_loss": d_loss, "g_loss": g_loss}
class GANMonitor(keras.callbacks.Callback):
def __init__(self, num_img=6, latent_dim=128):
self.num_img = num_img
self.latent_dim = latent_dim
def on_epoch_end(self, epoch, logs=None):
random_latent_vectors = tf.random.normal(shape=(self.num_img, self.latent_dim))
generated_images = self.model.generator(random_latent_vectors)
generated_images = (generated_images * 127.5) + 127.5
for i in range(self.num_img):
img = generated_images[i].numpy()
img = keras.preprocessing.image.array_to_img(img)
img.save("generated_img_{i}_{epoch}.png".format(i=i, epoch=epoch))
# Instantiate the optimizer for both networks
# (learning_rate=0.0002, beta_1=0.5 are recommended)
generator_optimizer = keras.optimizers.Adam(
learning_rate=0.0002, beta_1=0.5, beta_2=0.9
)
discriminator_optimizer = keras.optimizers.Adam(
learning_rate=0.0002, beta_1=0.5, beta_2=0.9
)
# Define the loss functions for the discriminator,
# which should be (fake_loss - real_loss).
# We will add the gradient penalty later to this loss function.
def discriminator_loss(real_img, fake_img):
real_loss = tf.reduce_mean(real_img)
fake_loss = tf.reduce_mean(fake_img)
return fake_loss - real_loss
# Define the loss functions for the generator.
def generator_loss(fake_img):
return -tf.reduce_mean(fake_img)
# Instantiate the customer `GANMonitor` Keras callback.
cbk = GANMonitor(num_img=3, latent_dim=noise_dim)
# Get the wgan model
wgan = WGAN(
discriminator=d_model,
generator=g_model,
latent_dim=noise_dim,
discriminator_extra_steps=3,
)
# Compile the wgan model
wgan.compile(
d_optimizer=discriminator_optimizer,
g_optimizer=generator_optimizer,
g_loss_fn=generator_loss,
d_loss_fn=discriminator_loss,
)
# Start training
wgan.fit(train_images, batch_size=BATCH_SIZE, epochs=200, callbacks=[cbk])
#save model
gen=wgan.generator
gen.save('WGAN.h5')
gen=tf.keras.models.load_model('WGAN.h5',custom_objects={'gaussian_weight_init':gaussian_weight_init})
random_latent_vectors = tf.random.normal(shape=(25, 100))
imgs=gen.predict(random_latent_vectors)
# scale from [-1,1] to [0,1]
imgs = (imgs + 1) / 2.0
for i in range(5 * 5):
# define subplot
plt.subplot(5, 5, 1 + i)
# turn off axis
plt.axis('off')
# plot raw pixel data
plt.imshow(imgs[i])
plt.savefig('WGAN.png')
plt.show()
Previously in training we were calculating FID on the 10K test set of images. This is quite efficient as it is fast to compute but it is not the most accurate way to calculate FID. We shall now calculate on the full set of 50K+10K=60K images since it is what we trained on too.
from torchmetrics.image.fid import FrechetInceptionDistance
import torch
fid=FrechetInceptionDistance(reset_real_features=False).to('cuda')
fid.reset()
real_images = loadRealSamples()
real_images = real_images.astype(np.float32)
real_images=torch.from_numpy(np.array(real_images))
#change dtypes, scale back first
real_images=(real_images+1)*(255/2)
real_images=real_images.type(torch.uint8)
#from 50,000x32x32x3 to 50,000x3x32x32
real_images = real_images.permute(0, 3, 1, 2)
real_images = real_images.to('cuda')
#COMPUTE FID
# create a dataloader with batch size 128
dataloader = torch.utils.data.DataLoader(real_images, batch_size=128, shuffle=True,
drop_last=True
)
# update the FID with real images
for batch,images in enumerate(dataloader):
print(f'Updating with real images now...Batch{batch}',end='\r')
fid.update(images, real=True)
print(f'\nFinished! Ready to take in generated image features')
def computeFID(generator,latentDim):
fid.reset()
#PREPARE DATA
random_latent_vectors = tf.random.normal(shape=(60000, 128))
gen_images=generator.predict(random_latent_vectors)
# gen_images=np.reshape(gen_images,(50000,3,32,32))
gen_images = gen_images.astype(np.float32)
gen_images = torch.from_numpy(np.array(gen_images))
gen_images=(gen_images+1)*(255/2)
gen_images=gen_images.type(torch.uint8)
gen_images = gen_images.permute(0, 3, 1, 2)
gen_images = gen_images.to('cuda')
# create a dataloader with batch size 128
dataloader_gen = torch.utils.data.DataLoader(gen_images, batch_size=128, shuffle=True,
# pin_memory=True,
drop_last=True)
print('')
# update the FID with generated images
for batch,images in enumerate(dataloader_gen):
print(f'Updating FID with generated images: Batch{batch}',end='\r')
fid.update(images, real=False)
print('\n')
score = fid.compute()
print(f'FID: {score.item()}')
return score
computeFID(gen,128)
print('')
This is our 2nd best model, with more tuning this can definitely be improved a lot but for the sake of time we will stop here. I already had to tune quite a lot to get the model to converge at all so I am quite happy with this result
Our best model so far in terms of FID was our DCGAN with 32 FID. Let's tune the following parameters to try to improve it
def createImageGen():
datagen = tf.keras.preprocessing.image.ImageDataGenerator(
rotation_range=15,
width_shift_range=0.15,
height_shift_range=0.15,
zoom_range=0.2,
horizontal_flip=True
)
return datagen
#function to reverse scaling/preprocessing steps, for data visualisation purposes
def reverseProcess(X):
X = (X *127.5)+127.5
X = X.astype('uint8')
return X
datagen=createImageGen()
from torchmetrics.image.fid import FrechetInceptionDistance
import torch
fid=FrechetInceptionDistance(reset_real_features=False).to('cuda')
fid.reset()
real_images = loadTestSamples()
real_images = real_images.astype(np.float32)
real_images=torch.from_numpy(np.array(real_images))
#change dtypes, scale back first
real_images=(real_images+1)*(255/2)
real_images=real_images.type(torch.uint8)
#from 50,000x32x32x3 to 50,000x3x32x32
real_images = real_images.permute(0, 3, 1, 2)
real_images = real_images.to('cuda')
#COMPUTE FID
# create a dataloader with batch size 128
dataloader = torch.utils.data.DataLoader(real_images, batch_size=128, shuffle=True,
drop_last=True
)
# update the FID with real images
for batch,images in enumerate(dataloader):
print(f'Updating with real images now...Batch{batch}',end='\r')
fid.update(images, real=True)
print(f'\nFinished! Ready to take in generated image features')
def computeFID(generator,latentDim):
fid.reset()
#PREPARE DATA
gen_images = generateFakeSamples(generator,latentDim,10000)[0]
# gen_images=np.reshape(gen_images,(50000,3,32,32))
gen_images = gen_images.astype(np.float32)
gen_images = torch.from_numpy(np.array(gen_images))
gen_images=(gen_images+1)*(255/2)
gen_images=gen_images.type(torch.uint8)
gen_images = gen_images.permute(0, 3, 1, 2)
gen_images = gen_images.to('cuda')
# create a dataloader with batch size 128
dataloader_gen = torch.utils.data.DataLoader(gen_images, batch_size=128, shuffle=True,
# pin_memory=True,
drop_last=True)
print('')
# update the FID with generated images
for batch,images in enumerate(dataloader_gen):
print(f'Updating FID with generated images: Batch{batch}',end='\r')
fid.update(images, real=False)
print('\n')
score = fid.compute()
print(f'FID: {score.item()}')
return score
# define the standalone discriminator model
def defineDisc(in_shape=(32, 32, 3)):
model = tf.keras.models.Sequential()
# normal
model.add(tf.keras.layers.Conv2D(128, (3, 3), padding='same', input_shape=in_shape,kernel_initializer=gaussian_weight_init))
model.add(tf.keras.layers.LeakyReLU(alpha=0.2))
# downsample
model.add(tf.keras.layers.Conv2D(128, (3, 3), strides=(2, 2), padding='same',kernel_initializer=gaussian_weight_init))
model.add(tf.keras.layers.LeakyReLU(alpha=0.2))
# downsample
model.add(tf.keras.layers.Conv2D(128, (3, 3), strides=(2, 2), padding='same',kernel_initializer=gaussian_weight_init))
model.add(tf.keras.layers.LeakyReLU(alpha=0.2))
# downsample
model.add(tf.keras.layers.Conv2D(128, (3, 3), strides=(2, 2), padding='same',kernel_initializer=gaussian_weight_init))
model.add(tf.keras.layers.LeakyReLU(alpha=0.2))
# classifier
model.add(tf.keras.layers.Flatten())
model.add(tf.keras.layers.Dropout(0.4))
model.add(tf.keras.layers.Dense(1, activation='sigmoid'))
# compile model
opt = tf.keras.optimizers.Adam(lr=0.0002, beta_1=0.5)
model.compile(loss='binary_crossentropy', optimizer=opt, metrics=['accuracy'])
return model
# define the standalone generator model
def defineGen(latent_dim,k):
model = tf.keras.models.Sequential()
# foundation for 4x4 image
n_nodes = 256 * 4 * 4
model.add(tf.keras.layers.Dense(n_nodes, input_dim=latent_dim))
model.add(tf.keras.layers.BatchNormalization(momentum=0.9))
model.add(tf.keras.layers.LeakyReLU(alpha=0.2))
model.add(tf.keras.layers.Reshape((4, 4, 256)))
# upsample to 8x8
model.add(tf.keras.layers.Conv2DTranspose(128, kernel_size=k, strides=(2, 2), padding='same',kernel_initializer=gaussian_weight_init))
model.add(tf.keras.layers.BatchNormalization(momentum=0.9))
model.add(tf.keras.layers.LeakyReLU(alpha=0.2))
# upsample to 16x16
model.add(tf.keras.layers.Conv2DTranspose(128, kernel_size=k, strides=(2, 2), padding='same',kernel_initializer=gaussian_weight_init))
model.add(tf.keras.layers.BatchNormalization(momentum=0.9))
model.add(tf.keras.layers.LeakyReLU(alpha=0.2))
# upsample to 32x32
model.add(tf.keras.layers.Conv2DTranspose(64, kernel_size=k, strides=(2, 2), padding='same',kernel_initializer=gaussian_weight_init))
model.add(tf.keras.layers.BatchNormalization(momentum=0.9))
model.add(tf.keras.layers.LeakyReLU(alpha=0.2))
# output layer
model.add(tf.keras.layers.Conv2D(3, (3, 3), activation='tanh', padding='same'))
return model
Now we have a dynamic scheduler for both discriminator and generator which simply takes one initial learning rate and scales them down depending on epoch count
#Learning rate schedulers
def scheduler(epoch,LR):
if epoch < 30:
return LR
elif epoch < 60:
return LR*0.8
elif epoch < 120:
return LR*0.65
else:
return LR*0.55
# define the combined generator and discriminator model, for updating the generator
def defineGAN(genModel, discModel):
# make weights in the discriminator not trainable
discModel.trainable = False
# connect them
model = tf.keras.models.Sequential()
# add generator
model.add(genModel)
# add the discriminator
model.add(discModel)
# compile model
opt = tf.keras.optimizers.Adam(lr=0.0002,beta_1=0.5)
model.compile(loss='binary_crossentropy', optimizer=opt)
return model
# train the generator and discriminator
def trainGAN(genModel, discModel, gan_model, dataset,latentDim, n_epochs=200, n_batch=128,fid_frequency=50,sumFrequency=2,name='model',best_FID=500,dLR=0.00015,gLR=0.0002):
bat_per_epo = int(dataset.shape[0] / n_batch)
half_batch = int(n_batch / 2)
bestFID=best_FID #previous best FID
iter=datagen.flow(dataset,y=None,batch_size=half_batch) #half batch size
# manually enumerate epochs
for i in range(n_epochs):
#set learning rates via schedulers
K.set_value(discModel.optimizer.learning_rate,scheduler(i,dLR))
K.set_value(gan_model.optimizer.learning_rate,scheduler(i,gLR))
# enumerate batches over the training set
for j in range(bat_per_epo):
# get randomly selected augmented samples
X_real=iter.next()
y_real = np.ones((len(X_real), 1)) #get 'real' labels
#smoothing labels
y_real=smooth_positive_labels(y_real)
# update discriminator model weights
d_loss1, _ = discModel.train_on_batch(X_real, y_real)
# generate 'fake' examples
X_fake, y_fake = generateFakeSamples(genModel, latentDim, half_batch)
# update discriminator model weights
d_loss2, _ = discModel.train_on_batch(X_fake, y_fake)
# prepare points in latent space as input for the generator
X_gan = generateLatentPoints(latentDim, n_batch)
# create inverted labels for the fake samples
y_gan = np.ones((n_batch, 1))
# update the generator via the discriminator's error
g_loss = gan_model.train_on_batch(X_gan, y_gan)
#progress bar per epoch
if j%25==0 or j==bat_per_epo-1:
dLR_sf = round(discModel.optimizer.learning_rate.numpy() / 10 ** math.floor(math.log10(discModel.optimizer.learning_rate.numpy())),1) * 10 ** math.floor(math.log10(discModel.optimizer.learning_rate.numpy()))
gLR_sf = round(gan_model.optimizer.learning_rate.numpy() / 10 ** math.floor(math.log10(gan_model.optimizer.learning_rate.numpy())),1) * 10 ** math.floor(math.log10(gan_model.optimizer.learning_rate.numpy()))
if j==bat_per_epo-1: #if last step
print(f'>epoch{i+1}, {j+1}/{bat_per_epo}, dLR:{dLR_sf}, gLR:{gLR_sf}, d1={d_loss1:.3f}, d2={d_loss2:.3f}, g={g_loss:.3f}')
else:
print(f'>epoch{i+1}, {j+1}/{bat_per_epo}, dLR:{dLR_sf}, gLR:{gLR_sf}, d1={d_loss1:.3f}, d2={d_loss2:.3f}, g={g_loss:.3f}',
end='\r')
#compute FID for every interval and last epoch
if (i>0 and (i+1)%fid_frequency==0) or (i+1)==n_epochs:
updated_gen_model = tf.keras.models.Sequential()
updated_gen_model.add(gan_model.layers[0])
# Set the weights of the new model to be the same as the generator portion of the gan_model
updated_gen_model.set_weights(gan_model.layers[0].get_weights())
FID=computeFID(updated_gen_model,latentDim)
if FID<bestFID:
print('saving best model')
bestFID=FID
#save model
updated_gen_model = tf.keras.models.Sequential()
updated_gen_model.add(gan_model.layers[0])
# Set the weights of the new model to be the same as the generator portion of the gan_model
updated_gen_model.set_weights(gan_model.layers[0].get_weights())
updated_gen_model.save(f'models/{name}.h5')
# evaluate the model performance every few epochs
summarisePerf2(i, genModel, discModel, dataset, latentDim)
return bestFID #return best FID score
%%time
results={} #dictionary
kList=[3,4,5]
# load image data
x_train = loadRealSamples()
# size of the latent space
latent_dim = 128
for k in kList:
print(f'************ NOW TESTING: k={k}***************')
bestFID=500 #reset FID
# create the discriminator
discModel = defineDisc()
# create the generator
genModel = defineGen(latent_dim,k)
# create the gan
gan_model = defineGAN(genModel, discModel)
# train model
bestFID=trainGAN(genModel, discModel, gan_model, x_train,
latent_dim, n_epochs=125,n_batch=64,fid_frequency=10,sumFrequency=1,name=f'modelHPK{k}',best_FID=bestFID)
results[k]=bestFID #add result to dictionary
print(f'Best for k={k}: {bestFID}')
Best k was 3
It is better to tune these 2 together in combinations because they have a close relationship
%%time
import os
#SELECT K FROM PREVIOUS TUNING
k=3
results={} #dictionary
#3 combinations
lrList=[(0.00015,0.00025),(0.0002,0.0002),(0.00025,0.00015)]
# load image data
x_train = loadRealSamples()
# size of the latent space
latent_dim = 128
for dLR,gLR in lrList:
print(f'************ NOW TESTING: dLR={dLR},gLR={gLR}***************')
bestFID=500
# create the discriminator
discModel = defineDisc()
# create the generator
genModel = defineGen(latent_dim,k)
# create the gan
gan_model = defineGAN(genModel, discModel)
# train model
bestFID=trainGAN(genModel, discModel, gan_model, x_train,
latent_dim, n_epochs=125,n_batch=64,fid_frequency=10,sumFrequency=1,name=f'modelHP{dLR}D{gLR}G',best_FID=bestFID,
dLR=dLR,gLR=gLR)
results[(dLR,gLR)]=bestFID.item() #add result to dictionary
print(f'Best for dLR={dLR} and gLR={gLR}: {bestFID}')
print(results)
Best parameters:
#load
bestModel=tf.keras.models.load_model('models/modelHP0.00015D0.00025G.h5',custom_objects={'gaussian_weight_init': gaussian_weight_init})
bestModel.summary()
from torchmetrics.image.fid import FrechetInceptionDistance
import torch
fid=FrechetInceptionDistance(reset_real_features=False).to('cuda')
fid.reset()
real_images = loadRealSamples()
real_images = real_images.astype(np.float32)
real_images=torch.from_numpy(np.array(real_images))
#change dtypes, scale back first
real_images=(real_images+1)*(255/2)
real_images=real_images.type(torch.uint8)
#from 50,000x32x32x3 to 50,000x3x32x32
real_images = real_images.permute(0, 3, 1, 2)
real_images = real_images.to('cuda')
#COMPUTE FID
# create a dataloader with batch size 128
dataloader = torch.utils.data.DataLoader(real_images, batch_size=128, shuffle=True,
drop_last=True
)
# update the FID with real images
for batch,images in enumerate(dataloader):
print(f'Updating with real images now...Batch{batch}',end='\r')
fid.update(images, real=True)
print(f'\nFinished! Ready to take in generated image features')
def computeFID(generator,latentDim):
fid.reset()
#PREPARE DATA
random_latent_vectors = tf.random.normal(shape=(60000, 128))
gen_images=generator.predict(random_latent_vectors)
# gen_images=np.reshape(gen_images,(50000,3,32,32))
gen_images = gen_images.astype(np.float32)
gen_images = torch.from_numpy(np.array(gen_images))
gen_images=(gen_images+1)*(255/2)
gen_images=gen_images.type(torch.uint8)
gen_images = gen_images.permute(0, 3, 1, 2)
gen_images = gen_images.to('cuda')
# create a dataloader with batch size 128
dataloader_gen = torch.utils.data.DataLoader(gen_images, batch_size=128, shuffle=True,
# pin_memory=True,
drop_last=True)
print('')
# update the FID with generated images
for batch,images in enumerate(dataloader_gen):
print(f'Updating FID with generated images: Batch{batch}',end='\r')
fid.update(images, real=False)
print('\n')
score = fid.compute()
print(f'FID: {score.item()}')
return score
computeFID(bestModel,128)
print('')
Our final tuned DCGAN model was able to achieve a FID of 29 which is quite decent. To compare to state-of-the-art benchmarks, this is 100th place. Best state-of-the-art models are <10 FID but those are much more complex models and many use the Pytorch StyleGAN library to achieve better implementations. Overall I am quite satisifed with my work. The final 1000 image generated plots can be found in finalImgs folder for the final DCGAN, CGAN and WGAN respectively.
#load
bestModel=tf.keras.models.load_model('models/finalDCGAN.h5',custom_objects={'gaussian_weight_init': gaussian_weight_init})
bestModel.summary()
imgs=generateFakeSamples(bestModel, 100, 1000)[0]
imgs=reverseProcess(imgs)
from PIL import Image
for i in range(1000):
im = Image.fromarray(imgs[i]).convert('RGB')
im.save(f"finalImgs/{i}.png")
imgs=generateFakeSamples(bestModel, 100, 1000)[0]
imgs=reverseProcess(imgs)
#plot some
for i in range(4 * 4):
# define subplot
plt.subplot(4, 4, 1 + i)
# turn off axis
plt.axis('off')
# plot raw pixel data
plt.imshow(imgs[i])
plt.show()
#Zip up whole folder
import shutil
shutil.make_archive('final', 'tar', '../ca2')